Enter 2006 September

home *** CD-ROM | disk | FTP | other *** search

/ Enter 2006 September / Enter 09 2006.iso / Internet / SpamExperts Home 1.1 / SpamExperts Home.exe / lib / spamexperts.modules / spamexperts / ProxyClassifier.pyc (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2006-07-14 | 16.8 KB | 427 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.4) from __future__ import division import sys import time import email import Queue import socket import xmlrpclib import threading import traceback import win32api import win32process from spambayes import Dibbler from spambayes.storage import NO_TRAINING_FLAG from spambayes.message import insert_exception_header import se_config from spamexperts import message from spamexperts import dnslookup from spamexperts.Options import options from spamexperts.fingerprint import fingerprint from spamexperts.dnsclassifier import DNSClassifier from spamexperts.OptionsClass import BLOCKED, DELAYED from spamexperts.OptionsClass import IS_HAM, IS_SPAM, IS_UNSURE, IS_UNKNOWN try: _ except NameError: _ = lambda x: x class ProxyClassifier(object): current_account = None HEADER_SIZE_FUDGE_FACTOR = 512 fingerprint_engine = fingerprint.Fingerprint() DNS = DNSClassifier() state = None fingerprints = 0 fingerprint_failures = 0 fingerprint_disabled = None processing_queue = Queue.Queue() processing_thread = None finish_processing = False processed_ids = [] def init(cls): cls.processing_thread = threading.Thread(target = cls.process_queue) cls.processing_thread.setDaemon(True) cls.processing_thread.start() init = classmethod(init) def _get_delayed_db(self): return self.state.delayed_messages[self.current_account] def _get_blocked_db(self): return self.state.blocked_messages[self.current_account] def _get_messages_to_delete(self): return self.state.delete_messages[self.current_account] def _set_delayed_db(self, value): self.state.delayed_messages[self.current_account] = value self.state.delayed_messages.store() def _set_blocked_db(self, value): self.state.blocked_messages[self.current_account] = value self.state.blocked_messages.store() def _set_messages_to_delete(self, value): self.state.delete_messages[self.current_account] = value self.state.delete_messages.store() delayed_db = property(_get_delayed_db, _set_delayed_db) blocked_db = property(_get_blocked_db, _set_blocked_db) messages_to_delete = property(_get_messages_to_delete, _set_messages_to_delete) def _classify_by_address(cls, msg): """Use the 'From' address to classify. """ addr_classifier = cls.state.address_classifier from_addr = msg['From'] (prob, clues) = addr_classifier.spamprob(from_addr, evidence = True) if prob == 1.0: return (1.0, IS_SPAM, clues) elif prob == 0.0: return (0.0, IS_HAM, clues) return (0.5, IS_UNKNOWN, clues) _classify_by_address = classmethod(_classify_by_address) def _classify_by_statistical(cls, msg): '''Use statistical analysis to classify. ''' minimum_ham = options[('Classifier', 'minimum_ham')] minimum_spam = options[('Classifier', 'minimum_spam')] if cls.state.bayes.nham < minimum_ham or cls.state.bayes.nspam < minimum_spam: clues = [ ('*HAMCOUNT*', cls.state.bayes.nham), ('*SPAMCOUNT*', cls.state.bayes.nspam)] return (0.5, IS_UNKNOWN, clues) (score, clues) = cls.state.bayes.spamprob(msg.tokenize(), evidence = True) if score > options[('Categorization', 'spam_cutoff')]: klass = IS_SPAM elif score < options[('Categorization', 'ham_cutoff')]: klass = IS_HAM else: klass = IS_UNKNOWN return (score, klass, clues) _classify_by_statistical = classmethod(_classify_by_statistical) def _classify_by_fingerprint(cls, msg): '''Use the fingerprinting system to classify. ''' FP = cls.fingerprint_engine.get_fingerprint(msg) if not FP: return (0.5, IS_UNKNOWN, [ ('fp-insufficient', '')]) mapping = { 'fingerprints': list(FP), 'user_id': se_config.spamexpertsConfig.user_id } if cls.fingerprint_disabled and time.time() > cls.fingerprint_disabled: cls.fingerprints = 0 cls.fingerprint_failures = 0 cls.fingerprint_disabled = None elif cls.fingerprints and cls.fingerprint_failures / cls.fingerprints > 0.29999999999999999: cls.fingerprint_disabled = time.time() + 3600 if cls.fingerprint_disabled: return (0.5, IS_UNKNOWN, [ ('fp-disabled-count', cls.fingerprints), ('fp-disabled-failure', cls.fingerprint_failures), ('fp-disabled-time', cls.fingerprint_disabled - time.time())]) old_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(10) cls.fingerprints += 1 for unused in xrange(3): try: result = cls.state.fingerprintclient.query(mapping) except socket.timeout: cls cls print >>sys.stderr, 'Connection to FP server timed out.' score = 0.5 clues = [ ('fp-timeout', socket.getdefaulttimeout())] cls.fingerprint_failures += 1 / 3 continue cls except (socket.error, xmlrpclib.Fault, StandardError): e = None print 'Error msg %s: Failed to query fingerprint server for %s.' % (e, msg.getId()) if options[('globals', 'verbose')]: (error_type, error, tb) = sys.exc_info() traceback.print_exception(error_type, error, tb) score = 0.5 clues = [] cls.fingerprint_failures += 1 / 3 continue cls elif result is None: score = 0.5 clues = [ ('fp-server-error', result)] cls.fingerprint_failures += 1 / 3 continue else: (match_count, matches) = result if match_count is None or match_count == '': score = 0.5 clues = [ ('fp-server-error', result)] cls.fingerprint_failures += 1 / 3 continue score = cls.fingerprint_engine.spamprob(match_count, FP) clues = [ ('FP:' + str(FP), float(match_count)), ('Matches:' + str(matches), score)] socket.setdefaulttimeout(old_timeout) if score > options[('Categorization', 'fingerprint_spam_cutoff')]: return (score, IS_SPAM, clues) return (score, IS_UNKNOWN, clues) _classify_by_fingerprint = classmethod(_classify_by_fingerprint) def _classify_by_DNS(cls, msg): (score, clues) = cls.DNS.spamprob(msg, True) if score > options[('Categorization', 'dns_spam_cutoff')]: return (score, IS_SPAM, clues) return (score, IS_UNKNOWN, clues) _classify_by_DNS = classmethod(_classify_by_DNS) def classify_message(cls, msg, flags = 0): '''Classify the given SEHeaderMessage, and return the classification. ''' (s_score, s_klass, s_clues) = cls._classify_by_statistical(msg) statistical = lambda unused: (s_score, s_klass, s_clues) if se_config.spamexpertsConfig.block_spam: systems = ((cls._classify_by_address, 'address', True), (statistical, 'statistical', False), (cls._classify_by_fingerprint, 'fingerprint', False), (cls._classify_by_DNS, 'dns', False)) else: systems = ((cls._classify_by_address, 'address', True), (statistical, 'statistical', False), (cls._classify_by_fingerprint, 'fingerprint', False)) final_score = 0.5 final_klass = IS_UNSURE final_clues = [] skip_training = False for classifier, description, skip in systems: start = time.time() (score, klass, clues) = classifier(msg) if options[('globals', 'verbose')]: print description, 'took', time.time() - start, 'seconds.' final_clues.extend(clues) final_clues.append((description, score)) if klass != IS_UNKNOWN: final_score = score final_klass = klass skip_training = skip break continue if final_klass == IS_UNKNOWN and options[('globals', 'verbose')]: print 'Final classification was unsure.' if not skip_training and not (flags & NO_TRAINING_FLAG) and not (final_klass == IS_UNSURE): if final_klass == IS_SPAM: cls.upload_fingerprint(msg) cls.train_statistical(msg, s_klass, final_klass) if not flags & NO_TRAINING_FLAG: cls.state.statistics.RecordClassification(final_score) return (final_score, final_klass, final_clues) classify_message = classmethod(classify_message) def upload_fingerprint(cls, msg): '''Upload the fingerprint for this message to the server. ''' cls.state.fingerprint_queue.put(msg) upload_fingerprint = classmethod(upload_fingerprint) def train_statistical(cls, msg, s_klass, klass): if s_klass == IS_UNKNOWN: if options[('globals', 'verbose')]: print 'Auto-training statistical system.' asSpam = klass == IS_SPAM cls.state.training_queue.put((msg, asSpam)) train_statistical = classmethod(train_statistical) def process_message(cls, messageText, uid, msg_info, current_account): msg_class = message.SEHeaderMessage msg = email.message_from_string(messageText, _class = msg_class) msg.setId(uid) try: (score, classification, clues) = cls.classify_message(msg) except: (messageText, details) = insert_exception_header(messageText) print >>sys.stderr, details del msg msg = email.message_from_string(messageText, _class = msg_class) msg.setId(uid) classification = IS_HAM score = 0.0 clues = [] result = classification if classification == IS_SPAM: classification = options[('Headers', 'header_spam_string')] old = cls.state.blocked_messages[current_account] old[msg.getId()] = msg_info cls.state.blocked_messages[current_account] = old cls.state.blocked_messages.store() msg.rememberBlockingState(current_account, BLOCKED) old = cls.state.delete_messages[current_account] old[msg.getId()] = msg_info cls.state.delete_messages[current_account] = old cls.state.delete_messages.store() elif classification == IS_UNSURE: classification = options[('Headers', 'header_unsure_string')] else: classification = options[('Headers', 'header_ham_string')] msg.rememberBlockingState(current_account, DELAYED) if msg_info: old = cls.state.delayed_messages[current_account] old[msg.getId()] = msg_info cls.state.delayed_messages[current_account] = old cls.state.delayed_messages.store() msg.RememberClassification(classification) msg.addHeaders(prob = score, clues = clues) cls.store_and_count_classified_message(msg, classification) return result process_message = classmethod(process_message) def store_and_count_classified_message(cls, msg, classification): '''Add message to the appropriate corpus, and count the number of ham/spam. ''' if classification == options[('Headers', 'header_spam_string')]: cls.state.numSpams += 1 corpus = cls.state.spamCorpus elif classification == options[('Headers', 'header_unsure_string')]: corpus = cls.state.unsureCorpus else: cls.state.numHams += 1 corpus = cls.state.hamCorpus message = corpus.makeMessage(msg.getId(), msg.as_string()) corpus.addMessage(message, observer_flags = NO_TRAINING_FLAG) store_and_count_classified_message = classmethod(store_and_count_classified_message) def process_queue(cls): '''Background message processing. This method is started when an object of this class is created (in the state) and run until the static variable finish_processing is True.''' below_normal = win32process.THREAD_PRIORITY_BELOW_NORMAL win32process.SetThreadPriority(win32api.GetCurrentThread(), below_normal) told_notifier = False counts = { IS_HAM: 0, IS_SPAM: 0, IS_UNSURE: 0 } while True: try: process_data = cls.processing_queue.get_nowait() (msg, uid, info, current_account) = process_data cls.state.model_notifier.SetBeginUpdating() told_notifier = True except Queue.Empty: if told_notifier: time.sleep(10) if not cls.processing_queue.empty(): continue if counts[IS_HAM] or counts[IS_UNSURE]: total_count = counts[IS_HAM] + counts[IS_UNSURE] if total_count != 1: plural = _('messages are') else: plural = _('message is') msg = _('%d %s waiting to be downloaded.') % (total_count, plural) if counts[IS_SPAM] == 1: msg += _(' 1 spam was blocked.') elif counts[IS_SPAM] > 1: msg += _(' %d spam were blocked.') % (counts[IS_SPAM],) snd = se_config.spamexpertsConfig.notify_sound notifier = cls.state.model_notifier notifier.SetEndUpdating(msg, snd) counts[IS_HAM] = 0 counts[IS_SPAM] = 0 counts[IS_UNSURE] = 0 else: cls.state.model_notifier.SetEndUpdating() told_notifier = False if cls.finish_processing: break time.sleep(1) continue if uid in cls.processed_ids: continue cls.processed_ids.append(uid) result = cls.process_message(msg, uid, info, current_account) counts[result] += 1 if options[('globals', 'verbose')]: print 'Processing queue ended.' process_queue = classmethod(process_queue) def get_blocking_welcome_message(username, server): server_dn = dnslookup.lookup[server] e = '%s@%s' % (username, server_dn) hr_diff = time.localtime()[3] - time.gmtime()[3] min_diff = time.localtime()[4] - time.gmtime()[4] received_time = time.strftime('%%d %%b %%Y %%H:%%M:%%S %+.2d%.2d' % (hr_diff, min_diff)) msg = ('Return-path: <info@spamexperts.com>', 'Received: (via local SpamExperts system); %s' % (received_time,), 'Envelope-to: info@spamexperts.com', 'X-URL: http://www.spamexperts.com/', _('Subject: Welcome to SpamExperts!'), 'From: SpamExperts <info@spamexperts.com>', 'Sender: info@spamexperts.com', 'Reply-To: info@spamexperts.com', 'To: %s' % (e,), 'Date: %s' % (time.strftime('%a, %d %B %Y %H:%M:%S'),), 'Message-ID: <spamexperts_welcome@%s>' % (server_dn,), '', _("SpamExperts will now block any 'spam' received for your %s email account from mail server %s.") % (username, server_dn), '', _('There is no need for you to change your email settings. When you normally check your email, our application first retrieves and filters all email from your mail account. After this the email is ready to be retrieved by your mail client.'), '', _('Whenever our application makes a mistake, you can easily correct this by opening our application (double click the SpamExperts icon at the bottom-right next to the Windows clock). Simply drag the misclassified email to the appropriate category.'), '', _('Please note that it may take up to 14 days for the application to have learned enough information to correctly classify your email.'), '', _('For more information or questions please visit http://www.spamexperts.com/'), '', _('The SpamExperts team')) return '\r\n'.join(msg).encode('latin-1') get_blocking_welcome_message = staticmethod(get_blocking_welcome_message) def get_welcome_message(username, server): server_dn = dnslookup.lookup[server] e = '%s@%s' % (username, server_dn) hr_diff = time.localtime()[3] - time.gmtime()[3] min_diff = time.localtime()[4] - time.gmtime()[4] received_time = time.strftime('%%d %%b %%Y %%H:%%M:%%S %+.2d%.2d' % (hr_diff, min_diff)) msg = ('Return-path: <info@spamexperts.com>', 'Received: (via local SpamExperts system); %s' % (received_time,), 'Envelope-to: info@spamexperts.com', 'X-URL: http://www.spamexperts.com/', _('Subject: Welcome to SpamExperts!'), 'From: SpamExperts <info@spamexperts.com>', 'Sender: info@spamexperts.com', 'Reply-To: info@spamexperts.com', 'To: %s' % (e,), 'Date: %s' % (time.strftime('%a, %d %B %Y %H:%M:%S'),), '', _("SpamExperts will classify any 'spam' received for your %s email account from mail server %s.") % (username, server_dn), '', _('There is no need for you to change your email settings. When you normally check your email, our application first retrieves and filters all email from your mail account. After this the email is ready to be retrieved by your mail client.'), '', _('Whenever our application makes a mistake, you can easily correct this by opening our application (double click the SpamExperts icon at the bottom-right next to the Windows clock). Simply drag the misclassified email to the appropriate category.'), '', _('Please note that it may take up to 14 days for the application to have learned enough information to correctly classify your email.'), '', _('For more information or questions please visit http://www.spamexperts.com/'), '', _('The SpamExperts team')) return '\r\n'.join(msg).encode('latin-1') get_welcome_message = staticmethod(get_welcome_message)